Content:
The data used in this project is of animal sightings on the trail cameras around the Duke forest and the spatial data of the Duke forest;it was collected as part of an ongoing project monitoring the white-tail deer population.
The data used in this project is of animal sightings on the trail cameras around the Duke forest and the spatial data of the Duke forest;it was collected as part of an ongoing project monitoring the white-tail deer population.
To conduct a deer survey with trail cameras, one camera must be placed every 160 or so acres. (Source: Thomas Jr., L. (2012, April 19). How to run a trail-camera survey. Quality Deer Management Association.) The Duke forest is 7,000 acres.(Source: Duke University. (n.d.). Duke Forest – Teaching and Research Laboratory. Retrieved from https://dukeforest.duke.edu) There are 30 trail cameras in the Duke forest along known migration paths.
library(here)
here() #get working directory
## [1] "/Users/sophiemoyer/Documents/EDE-Final"
knitr::include_graphics(here("Images", "buck_day.jpg"))
#import packages
library(tidyverse)
library(lubridate)
library(viridis)
library(rvest);
library(dataRetrieval)
library(dplyr)
library(readr)
library(stringr)
library(sf);
library(mapview); mapviewOptions(fgb = FALSE)
library(RColorBrewer)
#install.packages("AICcmodavg")
library(AICcmodavg)
##install.packages("multcompView")
library(multcompView)
custom.theme <- function() {
theme_minimal() +
theme(
panel.background = element_rect(fill = "seashell"),
panel.grid.major = element_line(colour = "bisque2", linetype = "dashed"),
axis.line = element_line(colour = "black", size = 0.5),
axis.text = element_text(size = 10, color = "salmon3", angle = 10),
axis.title = element_text(size = 12, face = "bold", color = "salmon4"),
plot.title = element_text(size = 16, color = "black", face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = 12, color = "gray", hjust = 0.5),
legend.text = element_text(size = 10, color = "salmon3"),
legend.title = element_text(size = 12, color = "salmon4", face = "bold"),
legend.position = "right"
)
}
#read excel file
trailcam_csv <- read.csv(here("Data", "Raw", "sequences.csv"))
moon_phases_csv <- read.csv(here("Data", "Raw", "moon_phases.csv"))
#date as objects
trailcam_csv$start_time <- ymd_hms(trailcam_csv$start_time)
trailcam_csv$end_time <- ymd_hms(trailcam_csv$end_time)
moon_phases_csv$start_date <- as.Date(moon_phases_csv$start_date)
#separate date and time
trailcam_csv$start_date <- as.Date(trailcam_csv$start_time)
trailcam_csv$start_time <- format(trailcam_csv$start_time, "%H")
trailcam_csv$month <- month(as.Date(trailcam_csv$start_date))
trailcam_csv$month_name <- month.name[trailcam_csv$month]
trailcam_csv$end_date <- as.Date(trailcam_csv$end_time)
trailcam_csv$end_time <- format(trailcam_csv$end_time, "%H")
#filter for white- tailed deer, select relevant columns, and isolate trail camera id number.
deer_data <- trailcam_csv %>%
filter(common_name == "White-tailed Deer") %>%
select(common_name, deployment_id, start_date, start_time, group_size, month, month_name) %>%
mutate(cam_id = as.numeric(str_extract(deployment_id, "\\d+")),
division = str_extract(deployment_id, "\\((.*?)\\)"))
#cam_id refers to the camera id number
#mutate(cam_id = as.numeric(str_extract(deployment_id, "\\d+"))) taken from stackoverflow
#read file with trail camera coordinates (delivered as a .xlsx and converted to a .csv)
cam_coords <- read.csv(here("Data", "Raw", "camera_coords.csv"))
#Select id number and coordinates
cam_coordinates <- cam_coords %>%
mutate(cam_id = as.numeric(str_extract(deployment_id, "\\d+")))
#join deer data with trail camera coordinate data to find where cameras are, and clean up data
deer_cam_data <- left_join(deer_data, cam_coordinates, by = "cam_id") %>%
select(common_name, start_date, start_time, group_size, cam_id, division, longitude, latitude, month, month_name)
deer_cam_data <- left_join(deer_cam_data, moon_phases_csv, by = "start_date")
#categorize hours into groups
deer_cam_data$start_time <- as.numeric(deer_cam_data$start_time)
categorize_time <- function(hour) {
ifelse(hour >= 6 & hour < 12, "Morning",
ifelse(hour >= 12 & hour < 20, "Afternoon", "Evening"))
}
deer_cam_data$time_category <- cut(deer_cam_data$start_time,
breaks = c(-Inf, 5.99, 11.99, 19.99, Inf),
labels = c("Evening", "Morning", "Afternoon", "Evening"),
right = FALSE)
categorize_moon_phase <- function(phase) {
phase_lower <- tolower(phase)
if (grepl("full", phase_lower) || grepl("gibbous", phase_lower)) {
return("FullAndGibbous")
} else if (grepl("new", phase_lower) || grepl("crescent", phase_lower)) {
return("NewAndCrescent")
} else {
return("QuarterMoon")
}
}
deer_cam_data$moon_type <- sapply(deer_cam_data$moon_phase, categorize_moon_phase)
#read Duke forest boundary shapefile into project
#here("Duke University/Documents/EDE_Fall2023/DukeForest-TackRiosMoyer/duke-forest-spatial-data/duke-forest-spatial-data/Boundary")
#forest_sf <- st_read("Duke_Forest_Boundary_Mar2022.shp")
#mapview(forest_sf)
#adding a new column - categorical time of day
deer_data <- deer_data %>% mutate(
TOD = case_when(
start_time %in% c(20, 21, 22, 23, "00", "01", "02", "03") ~ 'night ',
start_time %in% c("04", "05", "06", "07", "08", "09", 10, 11) ~ 'morning',
start_time %in% c(12, 13, 14, 15, 16, 17, 18, 19) ~ 'day',
TRUE ~ NA_character_
)
)
#categorical variable for moon phase
deer_data <- inner_join(deer_data, moon_phases_csv, by = "start_date")
#convert coordinates to a spatial dataframe
#deer_cam_data_sf <- deer_cam_data_sf %>% st_as_sf(coords = c("Longtitude","Latitude"), crs=4326)
Before performing any statistical tests, we should first look at the variables we hope to analyze to determine any initial observations about the relationship.
deer_data <- deer_data %>%
arrange(month, start_time)
# Calculate sum of sightings by month and hour
deer_hours <- deer_data %>%
group_by(month, start_time, month_name) %>%
mutate(sightings = 1, .groups = 'drop') %>%
summarise(sightings = sum(sightings), .groups = 'drop')
# Create a line plot using ggplot2
sightings <- ggplot(deer_hours, aes(x = start_time, y = sightings, group = month, color = as.factor(month_name))) +
geom_line() +
labs(title = "Deer Sightings", x = "Hour", y = "Sightings") +
scale_color_discrete(name = "Month") + custom.theme() + facet_wrap(~ month_name, ncol = 1)
sightings
As the plot suggests, there are some peaks in deer sightings that can be
observed over the course of a day. Most notably, in April there is a
large spike of deer sightings in the early morning and evening. This
plot connects with our first hypothesis: “The time of day has no
effect on deer observations”. In the next section, we will select a
statistical test to run on these variables to determine if there is any
relationship present.
#testing hypothesis one - time of day has no effect on deer observations
time.one.way <- aov(group_size ~ TOD, data = deer_data)
one.way.result <- summary(time.one.way)
timeone <- as.data.frame(one.way.result[[1]])
##knitr::kable(timeone, format = "html", digits = 3)
gt::gt(data = timeone) %>% gt::tab_header(title = gt::md("**One-Way ANOVA Results**"), subtitle = gt::md("Determining relationship between *deer sightings and time of day*"))
| One-Way ANOVA Results | ||||
| Determining relationship between deer sightings and time of day | ||||
| Df | Sum Sq | Mean Sq | F value | Pr(>F) |
|---|---|---|---|---|
| 2 | 3.726723 | 1.8633613 | 3.929286 | 0.02003825 |
| 800 | 379.379130 | 0.4742239 | NA | NA |
#testing hypothesis three - moon phase has no impact on deer observations
moon.one.way <- aov(group_size ~ moon_phase, data = deer_data)
moon.one <- summary(moon.one.way)
moontable <- as.data.frame(moon.one[[1]])
##knitr::kable(moontable, format = "html", digits = 3)
gt::gt(data = moontable) %>% gt::tab_header(title = gt::md("**One-Way ANOVA Results**"), subtitle = gt::md("Determining relationship between *deer sightings and moonphase*"))
| One-Way ANOVA Results | ||||
| Determining relationship between deer sightings and moonphase | ||||
| Df | Sum Sq | Mean Sq | F value | Pr(>F) |
|---|---|---|---|---|
| 7 | 3.637784 | 0.5196834 | 1.088756 | 0.3683484 |
| 795 | 379.468069 | 0.4773183 | NA | NA |
#moon and time test
moon.and.time <- aov(group_size ~ TOD + moon_phase, data = deer_data)
moontime <- summary(moon.and.time)
two.way.table <- as.data.frame(moontime[[1]])
##knitr::kable(two.way.table, format = "html", digits = 3)
gt::gt(data = two.way.table) %>% gt::tab_header(title = gt::md("**Two-Way ANOVA Results**"), subtitle = gt::md("Determining relationship between *deer sightings and time of day/moonphase*"))
| Two-Way ANOVA Results | ||||
| Determining relationship between deer sightings and time of day/moonphase | ||||
| Df | Sum Sq | Mean Sq | F value | Pr(>F) |
|---|---|---|---|---|
| 2 | 3.726723 | 1.8633613 | 3.935447 | 0.01991973 |
| 7 | 3.908303 | 0.5583290 | 1.179199 | 0.31211213 |
| 793 | 375.470827 | 0.4734815 | NA | NA |
#interaction test
moon.time <- aov(group_size ~ TOD*moon_phase, data = deer_data)
interaction <- summary(moon.time)
interactiontable <- as.data.frame(interaction[[1]])
##knitr::kable(interactiontable, format = "html", digits = 3)
gt::gt(data = interactiontable) %>% gt::tab_header(title = gt::md("**Two-Way ANOVA with *Interactions* Results**"), subtitle = gt::md("Determining relationship between *deer sightings and time of day/moon phase*"))
| Two-Way ANOVA with Interactions Results | ||||
| Determining relationship between deer sightings and time of day/moon phase | ||||
| Df | Sum Sq | Mean Sq | F value | Pr(>F) |
|---|---|---|---|---|
| 2 | 3.726723 | 1.8633613 | 3.8893718 | 0.02085667 |
| 7 | 3.908303 | 0.5583290 | 1.1653935 | 0.32029660 |
| 14 | 2.259285 | 0.1613775 | 0.3368414 | 0.98913864 |
| 779 | 373.211542 | 0.4790906 | NA | NA |
#maybe the division locations are adding a block?
moon.time.block <- aov(group_size ~ TOD*moon_phase + division, data = deer_data)
blocking <- summary(moon.time.block)
blockingtable <- as.data.frame(blocking[[1]])
##knitr::kable(blockingtable, format = "html", digits = 3)
gt::gt(data = blockingtable) %>% gt::tab_header(title = gt::md("**Two-Way ANOVA with *Blocking* Results**"), subtitle = gt::md("Determining relationship between *deer sightings and time of day/moon phases* with division blocking"))
| Two-Way ANOVA with Blocking Results | ||||
| Determining relationship between deer sightings and time of day/moon phases with division blocking | ||||
| Df | Sum Sq | Mean Sq | F value | Pr(>F) |
|---|---|---|---|---|
| 2 | 3.7267226 | 1.86336128 | 3.8815298 | 0.02102028 |
| 7 | 3.9083030 | 0.55832900 | 1.1630437 | 0.32170415 |
| 2 | 0.1820533 | 0.09102663 | 0.1896157 | 0.82731525 |
| 14 | 2.2833399 | 0.16309571 | 0.3397413 | 0.98866368 |
| 777 | 373.0054343 | 0.48005847 | NA | NA |
The one-way ANOVA (Analysis of Variance) results suggest that
there is a statistically significant difference among the means of the
groups defined by the variable TOD (Time of Day).
Here’s how to interpret the output:
TOD: 2 degrees of freedom.Residuals: 800 degrees of freedom.TOD: 3.7Residuals: 379.4TOD: 1.8634Residuals: 0.4742TOD divided by the Mean
Square for Residuals.
Interpretation: The p-value (Pr(>F)) is less than
0.05, indicating that there is evidence to reject the null
hypothesis. Therefore, there is a statistically significant
difference in means among the groups defined by TOD.
However, the specific interpretation of which groups are different would
require further post-hoc tests or examination of the group means.
It’s also worth noting that the effect size and the context of our study should be considered when interpreting the practical significance of the results.
model.set <- list(time.one.way, moon.and.time, moon.time, moon.time.block)
model.names <- c("one.way", "two.way", "interaction", "blocking")
aictab(model.set, modnames = model.names)
##
## Model selection based on AICc:
##
## K AICc Delta_AICc AICcWt Cum.Wt LL
## one.way 4 1684.76 0.00 0.95 0.95 -838.36
## two.way 11 1690.73 5.97 0.05 1.00 -834.20
## interaction 25 1715.22 30.46 0.00 1.00 -831.77
## blocking 27 1719.06 34.30 0.00 1.00 -831.55
#based on the AIC, the one-way ANVOA test is the best model. The one.way test has 95% of the AIC weight - meaning it explains 95% of the total variation in the dependent variable (group_size)
plot(time.one.way)
tukey.one.way <- TukeyHSD(time.one.way)
tukey.one.way
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = group_size ~ TOD, data = deer_data)
##
## $TOD
## diff lwr upr p adj
## morning-day 0.09096293 -0.0441878 0.22611366 0.2547153
## night -day -0.07540230 -0.2225017 0.07169713 0.4513530
## night -morning -0.16636523 -0.3073211 -0.02540937 0.0157434
plot(tukey.one.way, las = 1)
The results of the Tukey Honest Significant Difference (HSD) test
provide insights into the pairwise differences between the levels of the
TOD variable (Time of Day) with respect to the
group_size variable. Let’s break down the output:
diff: This column represents the differences in
means between the levels of TOD.
lwr and upr: These columns indicate the lower and upper bounds of the 95% confidence interval for the differences. If this interval includes zero, the difference is not considered statistically significant.
p adj (adjusted p-value): This column shows the adjusted p-values after correcting for multiple comparisons.
Now, let’s interpret the results for each pairwise comparison:
group_size between night and morning.Interpretation: The Tukey HSD test suggests
that the group_size differs significantly between night and
morning, while there is no significant difference between morning and
day or night and day. Keep in mind that the interpretation of
p-values depends on the chosen significance level (commonly 0.05), and
adjustments may be made for multiple comparisons.
#Bringing in Location data
#duke_forest<- st_read("/home/guest/module1/DukeForest-TackRiosMoyer/Data/Raw/Duke_Forest_Boundary_Mar2022.shp")
#converting camera points to locations
cameras.sf <- st_as_sf(cam_coordinates, coords = c("longitude","latitude"),
crs=4326)
#wrangling to include number of deer sightings at each location
#Plotting Camera sites and number of dear sighings at each one
mapview(cameras.sf,cex = 4, map.types="OpenStreetMap.Mapnik")
#mapview(cameras.sf,cex = "sightings",map.types="OpenStreetMap.Mapnik")
#plotting dear sighings at each camera trap for different times of the day
#custom theme to use for plots
custom.theme <- function() {
theme_minimal() +
theme(
panel.background = element_rect(fill = "seashell"),
panel.grid.major = element_line(colour = "bisque2", linetype = "dashed"),
axis.line = element_line(colour = "black", size = 0.5),
axis.text = element_text(size = 10, color = "salmon3", angle = 10),
axis.title = element_text(size = 12, face = "bold", color = "salmon4"),
plot.title = element_text(size = 16, color = "black", face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = 12, color = "gray", hjust = 0.5),
legend.text = element_text(size = 10, color = "salmon3"),
legend.title = element_text(size = 12, color = "salmon4", face = "bold"),
legend.position = "right"
)
}
#scatter plot of herd size and time of day
scatter.deer.time.herd <-
ggplot(deer_cam_data, aes(x = start_time, y = group_size)) +
geom_point() +
geom_smooth(method = loess, color="black") +
labs(
title = "Herd Size Observed Based on Time of Day",
x = "Time of Day (hour)",
y = "Herd Size"
) +
custom.theme()
print(scatter.deer.time.herd)
#insert image of phases of the moon for reference purposes
knitr::include_graphics(here("Images", "moon_phases.png"))
#scatter plot of herd size and moon phase
scatter.deer.moon.herd <-
ggplot(deer_cam_data, aes(x = moon_phase, y = group_size)) +
geom_point() +
labs(
title = "Herd Size Observed Based on Moon Phase",
x = "Moon Phase",
y = "Group Size"
) +
scale_x_discrete(labels = c("new moon", "waxing crescent", "first quarter", "waxing gibbous", "full moon", "waning gibbous", "third quarter", "waning crescent")) +
scale_y_continuous(breaks = seq(0, ceiling(max(deer_cam_data$group_size)), by = 1)) +
custom.theme()
#scale_x_discrete to get labels in order that I want them to be
print(scatter.deer.moon.herd)
#scatter plot of moon phase and time of day
scatter.deer.moon.time <-
ggplot(deer_cam_data, aes(x = moon_phase, y = start_time)) +
geom_point() +
geom_smooth(method = lm, color="black") +
labs(
title = "Deer Observed Based on Moon Phase & Time of Day",
x = "Moon Phase",
y = "Time of Day (hour)"
) +
custom.theme()
print(scatter.deer.moon.time)
#scatter plot of month and time of day, sorted by herd size
scatter.deer.time.month <-
ggplot(deer_cam_data, aes(x = start_date, y = start_time, color = group_size)) +
geom_point() +
geom_smooth(method = lm, color="black") +
labs(
title = "Deer Observed Based on Month & Time of Day",
subtitle = "Categorized by Herd Size",
x = "Month",
y = "Time of Day (hour)",
color = "Herd Size"
) +
custom.theme()
print(scatter.deer.time.month)
scatter.deer.time.day.moon <-
ggplot(deer_cam_data, aes(x = start_date, y = moon_type, color = time_category)) +
geom_point() +
labs(
title = "Deer Observed Based on Month & Moon Phase",
subtitle = "Categorized by Time of Day",
x = "Month by Date",
y = "Moon Phase (type)",
color = "Time of Day (type)"
) +
custom.theme()
print(scatter.deer.time.day.moon)
deer.moon.time.heatmap <-
ggplot(deer_cam_data, aes(x = moon_phase, y = start_time, fill = group_size)) +
geom_tile() +
scale_fill_gradient(low = "yellow", high = "red") +
labs(
title = "Deer Observed Based on Moon Phase & Time of Day",
subtitle = "Characterized by Herd Size",
x = "Moon Phase",
y = "Time of Day (hour)",
fill = "Herd Size"
) +
custom.theme()
print(deer.moon.time.heatmap)
deer.cam.time.heatmap <-
ggplot(deer_cam_data, aes(x = start_time, y = cam_id, fill = group_size)) +
geom_tile() +
scale_fill_gradient(low = "yellow", high = "red") +
labs(
title = "Deer Observed Based on Time of Day & Camera",
subtitle = "Characterized by Herd Size",
x = "Time of Day (hour)",
y = "Camera ID",
fill = "Herd Size"
) +
scale_x_continuous(breaks = seq(min(deer_cam_data$start_time), max(deer_cam_data$start_time), by = 2)) +
scale_y_continuous(breaks = seq(min(deer_cam_data$cam_id), max(deer_cam_data$cam_id), by = 5)) +
custom.theme()
print(deer.cam.time.heatmap)